In [1]:
    
import pandas as pd
import numpy as np
from sklearn.preprocessing import scale
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
    
In [2]:
    
df = pd.read_pickle('claims_df')
    
In [3]:
    
pca_df = df[['SP_ALZHDMTA','SP_CHF', 'SP_CHRNKIDN', 'SP_CNCR', 'SP_COPD', 'SP_DEPRESSN','SP_DIABETES', 'SP_ISCHMCHT', 'SP_OSTEOPRS', 'SP_RA_OA', 'SP_STRKETIA']]
    
In [4]:
    
pca_vals = pd.DataFrame(scale(pca_df))
    
In [5]:
    
pca_vals.columns = pca_df.columns
    
In [6]:
    
pca = PCA()
    
In [7]:
    
pca.fit(pca_vals)
    
    Out[7]:
In [8]:
    
plt.plot(np.cumsum(pca.explained_variance_ratio_));
    
    
In [9]:
    
sns.heatmap(pca.components_, annot=True);
    
    
In [10]:
    
lda = LinearDiscriminantAnalysis()
    
In [11]:
    
lda.fit(pca_vals, df.TOTAL_PAID)
    
    Out[11]:
In [12]:
    
plt.plot(np.cumsum(lda.explained_variance_ratio_));
    
    
In [13]:
    
pmt = df[['MEDREIMB_IP', 'BENRES_IP', 'PPPYMT_IP', 'MEDREIMB_OP', 'BENRES_OP',
       'PPPYMT_OP', 'MEDREIMB_CAR', 'BENRES_CAR', 'PPPYMT_CAR']]
    
In [14]:
    
pmt_norm = pd.DataFrame(scale(pmt))
    
In [15]:
    
pmt_norm.columns = pmt.columns
    
In [16]:
    
#pca_pmt = PCA(n_components=5)
pca_pmt = PCA()
    
In [17]:
    
pca_pmt.fit(pmt_norm)
    
    Out[17]:
In [18]:
    
pd.DataFrame(pca_pmt.transform(pmt_norm))
    
    Out[18]:
In [19]:
    
pmt_comp = pca_pmt.components_
    
In [20]:
    
pmt_comp_df = pd.DataFrame(pmt_comp)
    
In [21]:
    
pmt_comp_df.columns = pmt_norm.columns
    
In [22]:
    
pmt_comp_df
    
    Out[22]:
In [23]:
    
plt.plot(np.cumsum(pca_pmt.explained_variance_));
    
    
In [24]:
    
sns.heatmap(pmt_comp, annot=True);
    
    
In [ ]: